library(pheatmap)
library(stringr)
library(Biobase)
library(ggplot2)
library(Mfuzz)
load("analysis/obj/palmieri_final.rda")
res.IPscr_IPinh.ext<-read.delim(file="analysis/result/conventional/res.IPscr_IPinh.txt")
Genes were selected based on raw p-values < 0.05
#select genes
idx.genes2cluster<-res.IPscr_IPinh.ext$P.Value<0.05
table(idx.genes2cluster)
## idx.genes2cluster
## FALSE TRUE
## 15950 163
#get the normalized signal intensities of all arrays
genes2cluster<-res.IPscr_IPinh.ext[idx.genes2cluster,
grep(".CEL", colnames(res.IPscr_IPinh.ext))]
rownames(genes2cluster)<-res.IPscr_IPinh.ext[idx.genes2cluster, "SYMBOL"]
condition_names <- ifelse(str_detect(pData(palmieri_final)$condition,
"input"), "input", "IP")
treatment_names <- ifelse(str_detect(pData(palmieri_final)$treatment,
"scr"), "scr", "inh")
annotation_for_heatmap <-data.frame(condition = condition_names, treatment = treatment_names)
row.names(annotation_for_heatmap) <- row.names(pData(palmieri_final))
anno_colors <- list( condition= c(input = "chartreuse4", IP = "burlywood3"),
treatment= c(inh = "blue4", scr = "cadetblue2"))
pheatmap(genes2cluster, annotation_col = annotation_for_heatmap,
annotation_colors = anno_colors, scale = "row", show_rownames = F)
Row-wise z-score were calculated of the selected genes and k-means clustering was applied using 10 clusters
mat.scaled<-t(apply(genes2cluster,1,scale))
colnames(mat.scaled)<-colnames(genes2cluster)
lv.samp<-colnames(mat.scaled)
i=10
#run clustering
k<-kmeans(mat.scaled, i,20)
#get cluster centers
print(pheatmap(k$centers, annotation_col = annotation_for_heatmap,
cluster_cols=T, annotation_colors =anno_colors ))
clust.centers<-data.frame(k$centers)
df.cluster.centers<-reshape(clust.centers,idvar="cluster",ids=row.names(clust.centers),
times=names(clust.centers), timevar="sample",varying=list(names(clust.centers)),
direction = "long", v.names="center")
df.cluster.centers$cluster<-factor(df.cluster.centers$cluster, levels=(1:i))
df.cluster.centers$sample<-factor(df.cluster.centers$sample,levels=names(clust.centers))
p<-ggplot(df.cluster.centers, aes(x=sample, y=center, color=cluster, group=1))
p<-p+facet_grid(cluster ~ .)
p<-p+geom_line(lwd=2)+ theme(axis.text.x = element_text(angle = 90, hjust = 1))+
xlab("")
print(p)
anno_row=data.frame(cluster=as.factor(k$cluster))
pheatmap(mat.scaled[order(k$cluster),], cluster_cols=F,
cluster_rows = F,show_rownames = F,
annotation_row =anno_row ,
annotation_col=annotation_for_heatmap,annotation_colors = anno_colors)
selected.probes<-as.character(res.IPscr_IPinh.ext[idx.genes2cluster,"PROBEID"])
eset_final.s<-standardise(palmieri_final[selected.probes,])
## get m parameter
m1<-mestimate(eset_final.s)
m1
## [1] 2.03368
cl<-mfuzz(eset_final.s,c=14, m=m1)
#show cluster overlap
O<-overlap(cl)
ptmp<-overlap.plot(cl,over=O,thres = 0.05)
n.cluster<-nrow(cl$centers)
member.thr<-0.25
# plot settings
exprs.mat<-exprs(eset_final.s)
max.exprs<-max(exprs.mat)
min.exprs<-min(exprs.mat)
for(i in 1:n.cluster){
#get members
members<-cl$membership[(cl$membership[,i]>member.thr),i]
cl.exprs<-exprs.mat[names(members),]
plot.data<-reshape2::melt(cl.exprs)
colnames(plot.data)<-c("gene_name","array","z_scores")
#attach membership
plot.data2<-cbind(plot.data,membership=members[match(plot.data$gene_name,names(members))])
#high membership in the front
plot.data2$gene_name<-factor(plot.data2$gene_name,levels = names(members)[order(members)])
p<-ggplot(plot.data2, aes(x=array, y=z_scores, group=gene_name, color=membership))
p<-p+geom_line()+theme_classic()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
xlab("")+scale_color_gradientn(colors = c("yellow","greenyellow","cyan","royalblue","deeppink","red"),
limits=c(0.25,1))+ggtitle(paste0("cluster",i))
print(p)
cluster.table<-cbind(exprs(palmieri_final)[rownames(cl.exprs),],
fData(palmieri_final)[match(rownames(cl.exprs),
fData(palmieri_final)$PROBEID), c("SYMBOL","GENENAME")],
cluster.membership=members[rownames(cl.exprs)])
}
sessionInfo()
## R version 3.6.0 (2019-04-26)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS Mojave 10.14.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.6/Resources/lib/libRlapack.dylib
##
## locale:
## [1] C
##
## attached base packages:
## [1] tcltk parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] Mfuzz_2.45.0 DynDoc_1.63.0 widgetTools_1.63.0
## [4] e1071_1.7-2 ggplot2_3.2.0 Biobase_2.45.0
## [7] BiocGenerics_0.31.4 stringr_1.4.0 pheatmap_1.0.12
##
## loaded via a namespace (and not attached):
## [1] Rcpp_1.0.1 plyr_1.8.4 compiler_3.6.0
## [4] pillar_1.4.1 RColorBrewer_1.1-2 tkWidgets_1.63.0
## [7] class_7.3-15 tools_3.6.0 digest_0.6.19
## [10] evaluate_0.14 tibble_2.1.3 gtable_0.3.0
## [13] pkgconfig_2.0.2 rlang_0.3.4 rstudioapi_0.10
## [16] yaml_2.2.0 xfun_0.8 withr_2.1.2
## [19] dplyr_0.8.1 knitr_1.23 grid_3.6.0
## [22] tidyselect_0.2.5 glue_1.3.1 R6_2.4.0
## [25] rmarkdown_1.13 reshape2_1.4.3 purrr_0.3.2
## [28] magrittr_1.5 scales_1.0.0 htmltools_0.3.6
## [31] assertthat_0.2.1 colorspace_1.4-1 labeling_0.3
## [34] stringi_1.4.3 lazyeval_0.2.2 munsell_0.5.0
## [37] crayon_1.3.4